#---------------------------------------------------------------------------------#
### information: 
#---------------------------------------------------------------------------------#
### authors: björn bremer (bremer@mpifg.de), reto bürgisser (buergisser@ipz.uzh.ch)
### last updated: 22 October 2021

#---------------------------------------------------------------------------------#
### description
#---------------------------------------------------------------------------------#

### this file replicates the analysis of the following paper:
### "Public Opinion on Welfare State Recalibration in Times of Austerity"
### this file focuses on the results from the CONJOINT EXPERIMENT shown in the MAIN TEXT
### NOTE: The file uses code for parallelization that runs on MacOS and Unix (parallel::mclapply()). 
### NOTE: The code needs to be adjusted to run on Windows (parallel::parLapply()).

#---------------------------------------------------------------------------------#

RNGkind("L'Ecuyer-CMRG")
set.seed(1234567)

#--------------------------------------------------------
### load libraries ###
#--------------------------------------------------------

#library(rstudioapi)
library(plyr)
library(dplyr)
library(ggplot2) 
library(ggpubr)    # necessary to combine plots
library(glmnet)    # necessary for ridge regression
library(parallel)  # necessary for parallel computing
library(doMC)      # necessary for parallel computing
library(broom)     # necessary to get tidy regression output
library(xtable)    # necessary to create and print tex table
library(reshape2)  # necessary to reshape the data inside the function

#--------------------------------------------------------
### set the number of cores ###
#--------------------------------------------------------

registerDoMC(cores = 4)

#--------------------------------------------------------
### load data ###
#--------------------------------------------------------

load("df_cj_psrm_replication.Rdata") # load the file

#--------------------------------------------------------
### bootstrap functions to calculate coefficients and marginal means with ridge regression ###
#--------------------------------------------------------

B <- 1000  # n of bootstrap draws
# B <- 2 ## to test

## function to calculate acmes
boot.ridge <- function(x, data){
   a <- unique(data$Response.ID)
   rid.b <- sample(a, length(a), repl = TRUE)
   ind.b <- c()
   for(i in 1:length(rid.b)) ind.b <- c(ind.b, which(data$Response.ID %in% rid.b[i]))
   DD.b <- data[ind.b,]
   x.b <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=DD.b, 
                       contrasts.arg=list(Pensions=contrasts(DD.b$Pensions, contrasts=TRUE),
                                          Education=contrasts(DD.b$Education, contrasts=TRUE),
                                          PLMP=contrasts(DD.b$PLMP, contrasts=TRUE),
                                          ALMP=contrasts(DD.b$ALMP, contrasts=TRUE),
                                          Child.benefits=contrasts(DD.b$Child.benefits, contrasts=TRUE),
                                          Childcare=contrasts(DD.b$Childcare)))
   y.b <- DD.b$selected
   fit.ridge.b <- glmnet(x.b, y.b, alpha = 0,
                         lambda = bestlambda.ridge)
   coefficient.b <- predict(fit.ridge.b, type = "coefficients", s = bestlambda.ridge)
   tidy.b <- tidy(fit.ridge.b)
}

## function to calculate marginal means
boot.ridge.mm <- function(x, data){
   a <- unique(data$Response.ID)
   rid.b <- sample(a, length(a), repl = TRUE)
   ind.b <- c()
   for(i in 1:length(rid.b)) ind.b <- c(ind.b, which(data$Response.ID %in% rid.b[i]))
   DD.b <- data[ind.b,]
   x.b <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=DD.b, 
                       contrasts.arg=list(Pensions=contrasts(DD.b$Pensions, contrasts=TRUE),
                                          Education=contrasts(DD.b$Education, contrasts=TRUE),
                                          PLMP=contrasts(DD.b$PLMP, contrasts=TRUE),
                                          ALMP=contrasts(DD.b$ALMP, contrasts=TRUE),
                                          Child.benefits=contrasts(DD.b$Child.benefits, contrasts=TRUE),
                                          Childcare=contrasts(DD.b$Childcare)))
   y.b <- DD.b$selected
   fit.ridge.b <- glmnet(x.b, y.b, alpha = 0,
                         lambda = bestlambda.ridge)
   means.b <- predict(fit.ridge.b, x.b, type = "response", s = bestlambda.ridge)
   mm.b <- data.frame(x.b, fitted = means.b[, 1L, drop = TRUE])
   mm.b$PensionsNochange <- ifelse(mm.b$PensionsIncrease == 0 & mm.b$PensionsDecrease == 0, 1, 0)
   mm.b$EducationNochange <- ifelse(mm.b$EducationIncrease == 0 & mm.b$EducationDecrease == 0, 1, 0)
   mm.b$PLMPNochange <- ifelse(mm.b$PLMPIncrease == 0 & mm.b$PLMPDecrease == 0, 1,0)
   mm.b$ALMPNochange <- ifelse(mm.b$ALMPIncrease == 0 & mm.b$ALMPDecrease == 0, 1,0)
   mm.b$Child.benefitsNochange <- ifelse(mm.b$Child.benefitsIncrease == 0 & mm.b$Child.benefitsDecrease == 0, 1,0)
   mm.b$ChildcareNochange <- ifelse(mm.b$ChildcareIncrease == 0 & mm.b$ChildcareDecrease == 0, 1,0)
   mm_long.b <- melt(mm.b, id.vars = c("fitted"))
   mm_sum.b <- ddply(mm_long.b, c("variable", "value"), summarise,
                     mean=mean(fitted))
   mm_sum.b <- mm_sum.b[which (mm_sum.b$value == 1),]
}

#--------------------------------------------------------
### figure 4: amces from the conjoint survey experiment ###
#--------------------------------------------------------

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj, 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate results
start_time <- Sys.time()
out_coef <- mclapply(1:B, boot.ridge, data = df_cj, mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_coef <- do.call(rbind.data.frame, out_coef)

# calculate mean and higher/lower CI of the coefficients
alpha = .05

coef_sum <- ddply(df_out_coef, c("term"), summarise,
                  mean = mean(estimate),
                  low=quantile(estimate, alpha / 2),
                  high=quantile(estimate, 1 - alpha / 2))
print(coef_sum)

# include the baseline as a level (only for the coefficients)
ridge_baselines <- data.frame(term = c("EducationNochange", "PensionsNochange", "ALMPNochange", 
                                       "PLMPNochange", "ChildcareNochange", "Child.benefitsNochange"),
                              mean = c(0,0,0,0,0,0), low = c(0,0,0,0,0,0), high = c(0,0,0,0,0,0))

coef_sum <- rbind(coef_sum, ridge_baselines)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))
coef_sum <- rbind(coef_sum, ridge_attributes)

# rename the levels
coef_sum$names_final <- ifelse(coef_sum$term == "EducationDecrease", "Decrease education spending",
                               ifelse(coef_sum$term == "EducationIncrease", "Increase education spending",
                                      ifelse(coef_sum$term == "PensionsDecrease", "Decrease pension spending",
                                             ifelse(coef_sum$term == "PensionsIncrease", "Increase pension spending",
                                                    ifelse(coef_sum$term == "ALMPDecrease", "Decrease ALMP spending",
                                                           ifelse(coef_sum$term == "ALMPIncrease", "Increase ALMP spending",
                                                                  ifelse(coef_sum$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                         ifelse(coef_sum$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                ifelse(coef_sum$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                       ifelse(coef_sum$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                              ifelse(coef_sum$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                     ifelse(coef_sum$term == "Child.benefitsIncrease", "Increase child benefit spending", "Intercept"))))))))))))

# exclude the intercept from the df
coef_sum <- subset(coef_sum, term!= "(Intercept)")

# group the levels by attribute
coef_sum$group <- ifelse(coef_sum$term == "(Education)" | coef_sum$term == "EducationNochange" | coef_sum$term == "EducationIncrease" | coef_sum$term == "EducationDecrease", 1,
                         ifelse(coef_sum$term == "(Pensions)" | coef_sum$term == "PensionsNochange" | coef_sum$term == "PensionsIncrease" | coef_sum$term == "PensionsDecrease", 2,
                                ifelse(coef_sum$term == "(ALMP)" | coef_sum$term ==  "ALMPNochange" | coef_sum$term == "ALMPIncrease" | coef_sum$term == "ALMPDecrease", 3,
                                       ifelse(coef_sum$term == "(PLMP)" | coef_sum$term == "PLMPNochange" | coef_sum$term == "PLMPIncrease" | coef_sum$term == "PLMPDecrease", 4,
                                              ifelse(coef_sum$term == "(Childcare)" | coef_sum$term == "ChildcareNochange" | coef_sum$term == "ChildcareIncrease" | coef_sum$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
coef_sum$names_ordered <- factor(coef_sum$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                           "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                           "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                           "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                           "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                           "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

coefplot_all <- ggplot(coef_sum,aes(x = names_ordered)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1) +
   geom_point(aes(y=mean), size=1.5) +
   geom_hline(yintercept = 0,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","Child benefits",
                               "Decrease","Increase","No change","Childcare",
                               "Decrease","Increase","No change","PLMP",
                               "Decrease","Increase","No change","ALMP",
                               "Decrease","Increase","No change","Pension",
                               "Decrease","Increase","No change", "Education")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   theme(legend.position="none") +
   xlab("") + ylab("Change in Pr(Support for reform package)")
coefplot_all

ggsave(coefplot_all, width = 15, height = 12, units = c("cm"), file ="figure4.eps")

#--------------------------------------------------------
### figure 5: estimated marginal means of policy constituency vs. non-constituency ###
#--------------------------------------------------------

### calculate the marginal means

## pension constituency: retired vs. non-retired
###-----------------------------------
## retired

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$retired == "1"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$retired == "1"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_retired_1 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$retired == "1"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_retired_1 <- do.call(rbind.data.frame, out_mm_retired_1)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_retired_1$term <- as.character(df_out_mm_retired_1$variable)
df_out_mm_retired_1$estimate <- as.numeric(df_out_mm_retired_1$mean)

mm_sum_retired_1 <- ddply(df_out_mm_retired_1, c("term"), summarise,
                          mean = mean(estimate),
                          low=quantile(estimate, alpha / 2),
                          high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_retired_1)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_retired_1 <- rbind(mm_sum_retired_1, ridge_attributes)

###-----------------------------------
## non-retired

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$retired == "0"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$retired == "0"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_retired_0 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$retired == "0"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_retired_0 <- do.call(rbind.data.frame, out_mm_retired_0)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_retired_0$term <- as.character(df_out_mm_retired_0$variable)
df_out_mm_retired_0$estimate <- as.numeric(df_out_mm_retired_0$mean)

mm_sum_retired_0 <- ddply(df_out_mm_retired_0, c("term"), summarise,
                          mean = mean(estimate),
                          low=quantile(estimate, alpha / 2),
                          high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_retired_0)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))
mm_sum_retired_0 <- rbind(mm_sum_retired_0, ridge_attributes)

###-------------------------------------------
## combine the mms from all groups
mm_sum_retired_1$Group <- "Constituency"
mm_sum_retired_0$Group <- "Non-constituency"

mm_sum_retired <- rbind(mm_sum_retired_1, mm_sum_retired_0)

# rename the levels
mm_sum_retired$names_final <- ifelse(mm_sum_retired$term == "EducationDecrease", "Decrease education spending",
                                      ifelse(mm_sum_retired$term == "EducationIncrease", "Increase education spending",
                                             ifelse(mm_sum_retired$term == "EducationNochange", "No change",
                                                    ifelse(mm_sum_retired$term == "PensionsDecrease", "Decrease pension spending",
                                                           ifelse(mm_sum_retired$term == "PensionsIncrease", "Increase pension spending",
                                                                  ifelse(mm_sum_retired$term == "PensionsNochange", "No change",
                                                                         ifelse(mm_sum_retired$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                                ifelse(mm_sum_retired$term == "ALMPIncrease", "Increase ALMP spending",
                                                                                       ifelse(mm_sum_retired$term == "ALMPNochange", "No change",
                                                                                              ifelse(mm_sum_retired$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                                     ifelse(mm_sum_retired$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                                            ifelse(mm_sum_retired$term == "PLMPNochange", "No change",
                                                                                                                   ifelse(mm_sum_retired$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                                          ifelse(mm_sum_retired$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                                                 ifelse(mm_sum_retired$term == "ChildcareNochange", "No change",
                                                                                                                                        ifelse(mm_sum_retired$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                                                               ifelse(mm_sum_retired$term == "Child.benefitsIncrease", "Increase child benefit spending",
                                                                                                                                                      ifelse(mm_sum_retired$term == "Child.benefitsNochange", "No change", NA))))))))))))))))))

# exclude the intercept from the df
mm_sum_retired <- subset(mm_sum_retired, term!= "X.Intercept.")

# group the levels by attribute
mm_sum_retired$group <- ifelse(mm_sum_retired$term == "(Education)" | mm_sum_retired$term == "EducationNochange" | mm_sum_retired$term == "EducationIncrease" | mm_sum_retired$term == "EducationDecrease", 1,
                               ifelse(mm_sum_retired$term == "(Pensions)" | mm_sum_retired$term == "PensionsNochange" | mm_sum_retired$term == "PensionsIncrease" | mm_sum_retired$term == "PensionsDecrease", 2,
                                      ifelse(mm_sum_retired$term == "(ALMP)" | mm_sum_retired$term ==  "ALMPNochange" | mm_sum_retired$term == "ALMPIncrease" | mm_sum_retired$term == "ALMPDecrease", 3,
                                             ifelse(mm_sum_retired$term == "(PLMP)" | mm_sum_retired$term == "PLMPNochange" | mm_sum_retired$term == "PLMPIncrease" | mm_sum_retired$term == "PLMPDecrease", 4,
                                                    ifelse(mm_sum_retired$term == "(Childcare)" | mm_sum_retired$term == "ChildcareNochange" | mm_sum_retired$term == "ChildcareIncrease" | mm_sum_retired$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
mm_sum_retired$names_ordered <- factor(mm_sum_retired$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                       "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                       "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                       "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                       "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                       "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

## education constituency: people in education and with young children vs. everyone else ###
#--------------------------------------------------------

###-----------------------------------
## education benefit

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$eduben == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$eduben == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_eduben_1 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$eduben == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_eduben_1 <- do.call(rbind.data.frame, out_mm_eduben_1)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_eduben_1$term <- as.character(df_out_mm_eduben_1$variable)
df_out_mm_eduben_1$estimate <- as.numeric(df_out_mm_eduben_1$mean)

mm_sum_eduben_1 <- ddply(df_out_mm_eduben_1, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_eduben_1)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_eduben_1 <- rbind(mm_sum_eduben_1, ridge_attributes)

###-----------------------------------
## education non-benefit

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$eduben == 0),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$eduben == 0),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_eduben_0 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$eduben == 0),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_eduben_0 <- do.call(rbind.data.frame, out_mm_eduben_0)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_eduben_0$term <- as.character(df_out_mm_eduben_0$variable)
df_out_mm_eduben_0$estimate <- as.numeric(df_out_mm_eduben_0$mean)

mm_sum_eduben_0 <- ddply(df_out_mm_eduben_0, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_eduben_0)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_eduben_0 <- rbind(mm_sum_eduben_0, ridge_attributes)

###-------------------------------------------
## combine the mms from all groups

# fix the labels
mm_sum_eduben_1$Group <- "Constituency"
mm_sum_eduben_0$Group <- "Non-constituency"

mm_sum_eduben <- rbind(mm_sum_eduben_1, mm_sum_eduben_0)

# rename the levels
mm_sum_eduben$names_final <- ifelse(mm_sum_eduben$term == "EducationDecrease", "Decrease education spending",
                                      ifelse(mm_sum_eduben$term == "EducationIncrease", "Increase education spending",
                                             ifelse(mm_sum_eduben$term == "EducationNochange", "No change",
                                                    ifelse(mm_sum_eduben$term == "PensionsDecrease", "Decrease pension spending",
                                                           ifelse(mm_sum_eduben$term == "PensionsIncrease", "Increase pension spending",
                                                                  ifelse(mm_sum_eduben$term == "PensionsNochange", "No change",
                                                                         ifelse(mm_sum_eduben$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                                ifelse(mm_sum_eduben$term == "ALMPIncrease", "Increase ALMP spending",
                                                                                       ifelse(mm_sum_eduben$term == "ALMPNochange", "No change",
                                                                                              ifelse(mm_sum_eduben$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                                     ifelse(mm_sum_eduben$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                                            ifelse(mm_sum_eduben$term == "PLMPNochange", "No change",
                                                                                                                   ifelse(mm_sum_eduben$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                                          ifelse(mm_sum_eduben$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                                                 ifelse(mm_sum_eduben$term == "ChildcareNochange", "No change",
                                                                                                                                        ifelse(mm_sum_eduben$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                                                               ifelse(mm_sum_eduben$term == "Child.benefitsIncrease", "Increase child benefit spending",
                                                                                                                                                      ifelse(mm_sum_eduben$term == "Child.benefitsNochange", "No change", NA))))))))))))))))))

# exclude the intercept from the df
mm_sum_eduben <- subset(mm_sum_eduben, term!= "X.Intercept.")

# group the levels by attribute
mm_sum_eduben$group <- ifelse(mm_sum_eduben$term == "(Education)" | mm_sum_eduben$term == "EducationNochange" | mm_sum_eduben$term == "EducationIncrease" | mm_sum_eduben$term == "EducationDecrease", 1,
                                ifelse(mm_sum_eduben$term == "(Pensions)" | mm_sum_eduben$term == "PensionsNochange" | mm_sum_eduben$term == "PensionsIncrease" | mm_sum_eduben$term == "PensionsDecrease", 2,
                                       ifelse(mm_sum_eduben$term == "(ALMP)" | mm_sum_eduben$term ==  "ALMPNochange" | mm_sum_eduben$term == "ALMPIncrease" | mm_sum_eduben$term == "ALMPDecrease", 3,
                                              ifelse(mm_sum_eduben$term == "(PLMP)" | mm_sum_eduben$term == "PLMPNochange" | mm_sum_eduben$term == "PLMPIncrease" | mm_sum_eduben$term == "PLMPDecrease", 4,
                                                     ifelse(mm_sum_eduben$term == "(Childcare)" | mm_sum_eduben$term == "ChildcareNochange" | mm_sum_eduben$term == "ChildcareIncrease" | mm_sum_eduben$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
mm_sum_eduben$names_ordered <- factor(mm_sum_eduben$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                         "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                         "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                         "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                         "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                         "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

## almp + plmp constituency: outsider vs. non-outsiders everyone else 

###-----------------------------------
## outsider

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$workctr == "Outsider"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$workctr == "Outsider"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_outsider_1 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$workctr == "Outsider"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_outsider_1 <- do.call(rbind.data.frame, out_mm_outsider_1)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_outsider_1$term <- as.character(df_out_mm_outsider_1$variable)
df_out_mm_outsider_1$estimate <- as.numeric(df_out_mm_outsider_1$mean)

mm_sum_outsider_1 <- ddply(df_out_mm_outsider_1, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_outsider_1)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_outsider_1 <- rbind(mm_sum_outsider_1, ridge_attributes)

###-----------------------------------
## non-outsiders

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$workctr != "Outsider"),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$workctr != "Outsider"),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_outsider_0 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$workctr != "Outsider"),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_outsider_0 <- do.call(rbind.data.frame, out_mm_outsider_0)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_outsider_0$term <- as.character(df_out_mm_outsider_0$variable)
df_out_mm_outsider_0$estimate <- as.numeric(df_out_mm_outsider_0$mean)

mm_sum_outsider_0 <- ddply(df_out_mm_outsider_0, c("term"), summarise,
                           mean = mean(estimate),
                           low=quantile(estimate, alpha / 2),
                           high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_outsider_0)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_outsider_0 <- rbind(mm_sum_outsider_0, ridge_attributes)

###-------------------------------------------
## combine the mms from all groups

mm_sum_outsider_1$Group <- "Constituency"
mm_sum_outsider_0$Group <- "Non-constituency"

mm_sum_outsider <- rbind(mm_sum_outsider_1, mm_sum_outsider_0)

# rename the levels
mm_sum_outsider$names_final <- ifelse(mm_sum_outsider$term == "EducationDecrease", "Decrease education spending",
                                      ifelse(mm_sum_outsider$term == "EducationIncrease", "Increase education spending",
                                             ifelse(mm_sum_outsider$term == "EducationNochange", "No change",
                                                    ifelse(mm_sum_outsider$term == "PensionsDecrease", "Decrease pension spending",
                                                           ifelse(mm_sum_outsider$term == "PensionsIncrease", "Increase pension spending",
                                                                  ifelse(mm_sum_outsider$term == "PensionsNochange", "No change",
                                                                         ifelse(mm_sum_outsider$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                                ifelse(mm_sum_outsider$term == "ALMPIncrease", "Increase ALMP spending",
                                                                                       ifelse(mm_sum_outsider$term == "ALMPNochange", "No change",
                                                                                              ifelse(mm_sum_outsider$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                                     ifelse(mm_sum_outsider$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                                            ifelse(mm_sum_outsider$term == "PLMPNochange", "No change",
                                                                                                                   ifelse(mm_sum_outsider$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                                          ifelse(mm_sum_outsider$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                                                 ifelse(mm_sum_outsider$term == "ChildcareNochange", "No change",
                                                                                                                                        ifelse(mm_sum_outsider$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                                                               ifelse(mm_sum_outsider$term == "Child.benefitsIncrease", "Increase child benefit spending",
                                                                                                                                                      ifelse(mm_sum_outsider$term == "Child.benefitsNochange", "No change", NA))))))))))))))))))
# exclude the intercept from the df
mm_sum_outsider <- subset(mm_sum_outsider, term!= "X.Intercept.")

# group the levels by attribute
mm_sum_outsider$group <- ifelse(mm_sum_outsider$term == "(Education)" | mm_sum_outsider$term == "EducationNochange" | mm_sum_outsider$term == "EducationIncrease" | mm_sum_outsider$term == "EducationDecrease", 1,
                                ifelse(mm_sum_outsider$term == "(Pensions)" | mm_sum_outsider$term == "PensionsNochange" | mm_sum_outsider$term == "PensionsIncrease" | mm_sum_outsider$term == "PensionsDecrease", 2,
                                       ifelse(mm_sum_outsider$term == "(ALMP)" | mm_sum_outsider$term ==  "ALMPNochange" | mm_sum_outsider$term == "ALMPIncrease" | mm_sum_outsider$term == "ALMPDecrease", 3,
                                              ifelse(mm_sum_outsider$term == "(PLMP)" | mm_sum_outsider$term == "PLMPNochange" | mm_sum_outsider$term == "PLMPIncrease" | mm_sum_outsider$term == "PLMPDecrease", 4,
                                                     ifelse(mm_sum_outsider$term == "(Childcare)" | mm_sum_outsider$term == "ChildcareNochange" | mm_sum_outsider$term == "ChildcareIncrease" | mm_sum_outsider$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
mm_sum_outsider$names_ordered <- factor(mm_sum_outsider$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                         "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                         "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                         "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                         "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                         "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

## child benefits + childcare constituencies: people with children vs. people with no children u10 ###
#--------------------------------------------------------

###-----------------------------------
## children u10

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$children_u10 == 1),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$children_u10 == 1),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_children_u10_1 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$children_u10 == 1),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_children_u10_1 <- do.call(rbind.data.frame, out_mm_children_u10_1)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_children_u10_1$term <- as.character(df_out_mm_children_u10_1$variable)
df_out_mm_children_u10_1$estimate <- as.numeric(df_out_mm_children_u10_1$mean)

mm_sum_children_u10_1 <- ddply(df_out_mm_children_u10_1, c("term"), summarise,
                               mean = mean(estimate),
                               low=quantile(estimate, alpha / 2),
                               high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_children_u10_1)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_children_u10_1 <- rbind(mm_sum_children_u10_1, ridge_attributes)

###-----------------------------------
## non- children_u10

# create a matrix from the dataset
x <- model.matrix(~ Pensions + Education + PLMP + ALMP + Child.benefits + Childcare, data=df_cj[which (df_cj$children_u10 == 0),], 
                  contrasts.arg=list(Pensions=contrasts(df_cj$Pensions, contrasts=TRUE),
                                     Education=contrasts(df_cj$Education, contrasts=TRUE),
                                     PLMP=contrasts(df_cj$PLMP, contrasts=TRUE),
                                     ALMP=contrasts(df_cj$ALMP, contrasts=TRUE),
                                     Child.benefits=contrasts(df_cj$Child.benefits, contrasts=TRUE),
                                     Childcare=contrasts(df_cj$Childcare)))

# create a dv
y <- df_cj[which (df_cj$children_u10 == 0),]$selected

# define the number of folds
foldid <- sample(1:10,size=length(y),replace=TRUE)

# find the optimal lambda
cv.fit.ridge <- cv.glmnet(x, y, alpha = 0, parallel = TRUE, foldid = foldid, 
                          lambda = 10^seq(10,-2,length=200))
plot(cv.fit.ridge)
bestlambda.ridge <- cv.fit.ridge$lambda.min

# use the function to calculate marginal means
start_time <- Sys.time()
out_mm_children_u10_0 <- mclapply(1:B, boot.ridge.mm, data = df_cj[which (df_cj$children_u10 == 0),], mc.cores = 4)
end_time <- Sys.time()
time_taken <- end_time - start_time
print(time_taken)

# transform output into a dataframe
df_out_mm_children_u10_0 <- do.call(rbind.data.frame, out_mm_children_u10_0)

# calculate mean and higher/lower CI of the marginal means
df_out_mm_children_u10_0$term <- as.character(df_out_mm_children_u10_0$variable)
df_out_mm_children_u10_0$estimate <- as.numeric(df_out_mm_children_u10_0$mean)

mm_sum_children_u10_0 <- ddply(df_out_mm_children_u10_0, c("term"), summarise,
                               mean = mean(estimate),
                               low=quantile(estimate, alpha / 2),
                               high=quantile(estimate, 1 - alpha / 2))
print(mm_sum_children_u10_0)

# include the attribute name as a level
ridge_attributes <- data.frame(term = c("(Education)", "(Pensions)", "(ALMP)", "(PLMP)", "(Childcare)", "(Child.benefits)"),
                               mean = c(NA, NA, NA, NA, NA, NA), low = c(NA, NA, NA, NA, NA, NA), high = c(NA, NA, NA, NA, NA, NA))

mm_sum_children_u10_0 <- rbind(mm_sum_children_u10_0, ridge_attributes)

###-------------------------------------------
## combine the mms from all groups
mm_sum_children_u10_1$Group <- "Constituency"
mm_sum_children_u10_0$Group <- "Non-constituency"

mm_sum_children_u10 <- rbind(mm_sum_children_u10_1, mm_sum_children_u10_0)

# rename the levels
mm_sum_children_u10$names_final <- ifelse(mm_sum_children_u10$term == "EducationDecrease", "Decrease education spending",
                                          ifelse(mm_sum_children_u10$term == "EducationIncrease", "Increase education spending",
                                                 ifelse(mm_sum_children_u10$term == "EducationNochange", "No change",
                                                        ifelse(mm_sum_children_u10$term == "PensionsDecrease", "Decrease pension spending",
                                                               ifelse(mm_sum_children_u10$term == "PensionsIncrease", "Increase pension spending",
                                                                      ifelse(mm_sum_children_u10$term == "PensionsNochange", "No change",
                                                                             ifelse(mm_sum_children_u10$term == "ALMPDecrease", "Decrease ALMP spending",
                                                                                    ifelse(mm_sum_children_u10$term == "ALMPIncrease", "Increase ALMP spending",
                                                                                           ifelse(mm_sum_children_u10$term == "ALMPNochange", "No change",
                                                                                                  ifelse(mm_sum_children_u10$term == "PLMPDecrease", "Decrease PLMP spending",
                                                                                                         ifelse(mm_sum_children_u10$term == "PLMPIncrease", "Increase PLMP spending",
                                                                                                                ifelse(mm_sum_children_u10$term == "PLMPNochange", "No change",
                                                                                                                       ifelse(mm_sum_children_u10$term == "ChildcareDecrease", "Decrease childcare spending",
                                                                                                                              ifelse(mm_sum_children_u10$term == "ChildcareIncrease", "Increase childcare spending",
                                                                                                                                     ifelse(mm_sum_children_u10$term == "ChildcareNochange", "No change",
                                                                                                                                            ifelse(mm_sum_children_u10$term == "Child.benefitsDecrease", "Decrease child benefit spending",
                                                                                                                                                   ifelse(mm_sum_children_u10$term == "Child.benefitsIncrease", "Increase child benefit spending",
                                                                                                                                                          ifelse(mm_sum_children_u10$term == "Child.benefitsNochange", "No change", NA))))))))))))))))))
# exclude the intercept from the df
mm_sum_children_u10 <- subset(mm_sum_children_u10, term!= "X.Intercept.")

# group the levels by attribute
mm_sum_children_u10$group <- ifelse(mm_sum_children_u10$term == "(Education)" | mm_sum_children_u10$term == "EducationNochange" | mm_sum_children_u10$term == "EducationIncrease" | mm_sum_children_u10$term == "EducationDecrease", 1,
                                    ifelse(mm_sum_children_u10$term == "(Pensions)" | mm_sum_children_u10$term == "PensionsNochange" | mm_sum_children_u10$term == "PensionsIncrease" | mm_sum_children_u10$term == "PensionsDecrease", 2,
                                           ifelse(mm_sum_children_u10$term == "(ALMP)" | mm_sum_children_u10$term ==  "ALMPNochange" | mm_sum_children_u10$term == "ALMPIncrease" | mm_sum_children_u10$term == "ALMPDecrease", 3,
                                                  ifelse(mm_sum_children_u10$term == "(PLMP)" | mm_sum_children_u10$term == "PLMPNochange" | mm_sum_children_u10$term == "PLMPIncrease" | mm_sum_children_u10$term == "PLMPDecrease", 4,
                                                         ifelse(mm_sum_children_u10$term == "(Childcare)" | mm_sum_children_u10$term == "ChildcareNochange" | mm_sum_children_u10$term == "ChildcareIncrease" | mm_sum_children_u10$term =="ChildcareDecrease", 5, 6)))))

# order the factored variable
mm_sum_children_u10$names_ordered <- factor(mm_sum_children_u10$term, levels = c("Child.benefitsDecrease","Child.benefitsIncrease","Child.benefitsNochange","(Child.benefits)",
                                                                                 "ChildcareDecrease","ChildcareIncrease","ChildcareNochange", "(Childcare)", 
                                                                                 "PLMPDecrease","PLMPIncrease","PLMPNochange","(PLMP)",
                                                                                 "ALMPDecrease","ALMPIncrease","ALMPNochange","(ALMP)", 
                                                                                 "PensionsDecrease","PensionsIncrease","PensionsNochange","(Pensions)",
                                                                                 "EducationDecrease","EducationIncrease","EducationNochange","(Education)"
))

###-------------------------------------------
### plot the graphs

# retired vs. non-retired
mmplot_retired <- ggplot(mm_sum_retired[which (mm_sum_retired$group == 1 | mm_sum_retired$group == 2 | mm_sum_retired$group == 4),], aes(x = names_ordered, colour = Group)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = Group), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c(
      "Decrease","Increase","No change","PLMP",
      "Decrease","Increase","No change","Pension",
      "Decrease","Increase","No change", "Education")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   ggtitle("Pension") + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
mmplot_retired

#ggsave(mmplot_retired, width = 17.5, height = 12, units = c("cm"), file ="mmplot_retired_small.eps")

## edu benefit vs. non-benefit
mmplot_edu <- ggplot(mm_sum_eduben[which (mm_sum_eduben$group == 1 | mm_sum_eduben$group == 2 | mm_sum_eduben$group == 4),],aes(x = names_ordered, colour = Group)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = Group), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c(
      "Decrease","Increase","No change","PLMP",
      "Decrease","Increase","No change","Pension",
      "Decrease","Increase","No change", "Education")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   ggtitle("Education") + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour") 
mmplot_edu

#ggsave(mmplot_edu, width = 17.5, height = 12, units = c("cm"), file ="mmplot_edu_new_small.eps")

#--------------------------------------------------------
## outsiders vs. non-outsiders
mmplot_outsider <- ggplot(mm_sum_outsider[which (mm_sum_outsider$group == 2 | mm_sum_outsider$group == 3 | mm_sum_outsider$group == 4),], aes(x = names_ordered, colour = Group)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = Group), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","PLMP",
                               "Decrease","Increase","No change","ALMP",
                               "Decrease","Increase","No change","Pension")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   ggtitle("Labor Market Policy") + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
mmplot_outsider

#ggsave(mmplot_outsider, width = 17.5, height = 12, units = c("cm"), file ="mmplot_outsider_small.eps")

## people with children u10 vs. people without children u10
mmplot_children_u10 <- ggplot(mm_sum_children_u10[which (mm_sum_children_u10$group == 2 | mm_sum_children_u10$group == 5 | mm_sum_children_u10$group == 6),], aes(x = names_ordered, colour = Group)) +
   geom_errorbar(aes(ymin= low, ymax=high), width=0, size=1, position = position_dodge(0.8)) +
   geom_point(aes(y=mean, shape = Group), size=2, position = position_dodge(0.8)) +
   geom_hline(yintercept = 0.5,size=.5,colour="black",linetype="dotted") +
   coord_flip() +
   theme_bw() + 
   scale_x_discrete(labels = c("Decrease","Increase","No change","Child benefits",
                               "Decrease","Increase","No change","Childcare",
                               "Decrease","Increase","No change","Pension")) +
   theme(axis.text.y = element_text(face = c('plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold',
                                             'plain', 'plain', 'plain', 'bold'))) +
   guides(colour = guide_legend(reverse = TRUE)) +
   guides(shape = guide_legend(reverse = TRUE)) +
   xlab("") + ylab("Marginal mean") +
   ggtitle("Family Policy") + theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
   scale_colour_grey(start = 0.2, end = 0.8, aesthetics = "colour")
mmplot_children_u10

#ggsave(mmplot_children_u10, width = 17.5, height = 12, units = c("cm"), file ="mmplot_children_u10_small.eps")

## combine the graphs
constituencies <- ggarrange(mmplot_retired, mmplot_edu, mmplot_outsider, mmplot_children_u10, ncol=2, nrow=2, common.legend = TRUE, legend="bottom")
ggsave(constituencies, width = 18, height = 25, units = c("cm"), file ="figure5.eps")

#--------------------------------------------------------
### table 3: estimated marginal means of policy constituency vs. non-constituency ###
#--------------------------------------------------------

## extract estimates from df
# pension
table5_pension <- mm_sum_retired %>% filter(term %in% c("PensionsNochange","PensionsIncrease", "PensionsDecrease"))
table5_pension <- table5_pension %>% subset(select = c(term, mean, high, low, Group))

# education
table5_edu <- mm_sum_eduben %>% filter(term %in% c("EducationNochange","EducationIncrease", "EducationDecrease"))
table5_edu <- table5_edu %>% subset(select = c(term, mean, high, low, Group))

# labor market policy (lmp): almp and plmp
table5_lmp <- mm_sum_outsider %>% filter(term %in% c("ALMPNochange", "ALMPIncrease", "ALMPDecrease","PLMPNochange", "PLMPIncrease", "PLMPDecrease"))
table5_lmp <- table5_lmp %>% subset(select = c(term, mean, high, low, Group))

# family policy (fp): childcare and child benefits
table5_fp <- mm_sum_children_u10 %>% filter(term %in% c("ChildcareNochange", "ChildcareIncrease", "ChildcareDecrease", "Child.benefitsNochange", "Child.benefitsIncrease", "Child.benefitsDecrease"))
table5_fp <- table5_fp %>% subset(select = c(term, mean, high, low, Group))

## merge the results in one df
table5_long <- rbind(table5_edu,table5_pension, table5_lmp, table5_fp)
table5_long <- table5_long %>% rename(Term = term, MarginalMean = mean, UppperCI = high, LowerCI = low)
table5_long$Group <- ifelse(table5_long$Group == "Constituency", "Constituency", "NonConstituency")

## create version without information about confidence intervals
table5_select <- table5_long %>% subset(select = c(Term, MarginalMean, Group))

library(tidyverse) # only load tidyverse here, otherwise the rename function above needs to be adjusted 
table5 <- table5_select %>% spread(Group, MarginalMean)
table5$Difference <- table5$Constituency - table5$NonConstituency

table5 %>% mutate_if(is.numeric, round, digits=3)

table5 <- xtable(table5, digits = 3)
print(table5, file = "table3.tex")

#--------------------------------------------------------
### save the results from the regression models ###
#--------------------------------------------------------

save(coef_sum, mm_sum_retired, mm_sum_eduben, mm_sum_outsider, mm_sum_children_u10, 
     file = "cj_results_main.RData")